Large Event Analysis#
import pandas as pd
import numpy as np
import datetime as dt
csv_file = "../datasets/All (1960-2023).csv"
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_17248\3011563604.py:2: DtypeWarning: Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
usgs = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
Data Filtering#
Converting the date columns to datetime
Date > 1960-01-01 and < 2023-01-01
Longitude > -123 and < -113
Latitude > 29 and < 39
Show code cell source
#converting the Date column into datetime format
usgs["time"] = pd.to_datetime(usgs["time"], errors="coerce").dt.strftime("%Y-%m-%d")
usgs = usgs[(pd.to_datetime(usgs['time']) > pd.to_datetime('1960-01-01')) & (pd.to_datetime(usgs['time']) < pd.to_datetime('2023-01-01'))]
usgs['longitude'] = pd.to_numeric(usgs['longitude'], errors='coerce')
usgs['latitude'] = pd.to_numeric(usgs['latitude'], errors='coerce')
usgs['mag'] = pd.to_numeric(usgs['mag'], errors='coerce')
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs = usgs[usgs['longitude'] > -123]
usgs = usgs[usgs['longitude'] < -113]
usgs = usgs[usgs['latitude'] < 39]
usgs = usgs[usgs['latitude'] > 29]
time = []
for i in usgs['time']:
time.append(pd.to_datetime(i))
usgs['time'] = time
usgs.head()
| time | latitude | longitude | depth | mag | magType | nst | gap | dmin | rms | ... | updated | place | type | horizontalError | depthError | magError | magNst | status | locationSource | magSource\r | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 240 | 2022-12-31 | 33.397500 | -116.393333 | 3.88 | 4.14 | mw | 132 | 16 | 0.07391 | 0.19 | ... | 2023-09-22T21:50:30.029Z | 16 km N of Borrego Springs, CA | earthquake | 0.1 | 0.38 | NaN | 6 | reviewed | ci | ci\r |
| 241 | 2022-12-31 | 34.355667 | -116.921833 | 4.73 | 3.47 | mw | 121 | 25 | 0.07845 | 0.15 | ... | 2023-03-07T19:00:01.040Z | 11km SSE of Lucerne Valley, CA | earthquake | 0.09 | 0.41 | NaN | 4 | reviewed | ci | ci\r |
| 246 | 2022-12-22 | 37.620167 | -122.025000 | 3.82 | 3.34 | mw | 141 | 16 | NaN | 0.16 | ... | 2023-04-20T04:34:00.806Z | 3km N of Union City, CA | earthquake | 0.1 | 0.17 | NaN | 3 | reviewed | nc | nc\r |
| 262 | 2022-12-17 | 37.918167 | -122.304000 | 5.48 | 3.57 | mw | 170 | 19 | 0.01598 | 0.15 | ... | 2023-07-27T08:15:34.318Z | 1km ENE of El Cerrito, CA | earthquake | 0.1 | 0.17 | NaN | 4 | reviewed | nc | nc\r |
| 263 | 2022-12-13 | 36.604667 | -121.209333 | 8.88 | 3.28 | ml | 67 | 55 | 0.03812 | 0.09 | ... | 2023-02-18T22:04:08.040Z | 10km NW of Pinnacles, CA | earthquake | 0.14 | 0.28 | 0.129 | 72 | reviewed | nc | nc\r |
5 rows × 22 columns
Data Grouping And Merging#
Data is grouped into 1 day chunks based on the max magnitude and earthquake multiplicity and filtered based on the top values
usgs_grouped_counts = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.count())
usgs_grouped_counts.rename(columns={'mag':'count'}, inplace=True)
usgs_grouped_counts.reset_index(inplace=True)
usgs_grouped_counts['time'] = usgs_grouped_counts['time'].dt.to_timestamp()
usgs_grouped_max = pd.DataFrame(usgs.groupby(usgs['time'].dt.to_period('D')).mag.max())
usgs_grouped_max.reset_index(inplace=True)
usgs_grouped_max['time'] = usgs_grouped_max['time'].dt.to_timestamp()
#filter usgs_grouped_counts to the 10 largest values
usgs_grouped_counts_top10 = usgs_grouped_counts.nlargest(10, 'count')
usgs_grouped_counts_top10.head(10)
| time | count | |
|---|---|---|
| 10743 | 2019-07-06 | 605 |
| 9330 | 2010-04-05 | 233 |
| 6007 | 1992-06-28 | 226 |
| 3155 | 1979-10-16 | 189 |
| 6008 | 1992-06-29 | 186 |
| 7561 | 1999-10-16 | 181 |
| 6387 | 1994-01-17 | 148 |
| 4000 | 1983-05-03 | 147 |
| 1377 | 1971-02-09 | 145 |
| 3298 | 1980-05-26 | 139 |
#filter the usgs_grouped_max to the 10 largest values
usgs_grouped_max_top10 = usgs_grouped_max.nlargest(10, 'mag')
usgs_grouped_max_top10.head(10)
| time | mag | |
|---|---|---|
| 6007 | 1992-06-28 | 7.3 |
| 9329 | 2010-04-04 | 7.2 |
| 7561 | 1999-10-16 | 7.1 |
| 10743 | 2019-07-06 | 7.1 |
| 5466 | 1989-10-18 | 6.9 |
| 3999 | 1983-05-02 | 6.7 |
| 6387 | 1994-01-17 | 6.7 |
| 939 | 1968-04-09 | 6.6 |
| 1377 | 1971-02-09 | 6.6 |
| 5040 | 1987-11-24 | 6.6 |
one_week = dt.timedelta(days=7)
usgs_grouped_counts_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)
usgs_grouped_max_top10 = usgs_grouped_counts_top10.sort_values(by='time', ascending=True)
Time Before/After Large Events#
import plotly.express as px
import plotly.graph_objects as go
Time before/after a day of high earthquake multiplicity
Show code cell source
filtered_data_df = pd.DataFrame()
# Plotting 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
current_time = row['time']
previous_time = current_time - one_week
next_time = current_time + one_week
filtered_data_before = usgs_grouped_counts[(usgs_grouped_counts['time'] >= previous_time) & (usgs_grouped_counts['time'] < current_time)].copy()
filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days # Calculate days until the large value
filtered_data_after = usgs_grouped_counts[(usgs_grouped_counts['time'] <= next_time) & (usgs_grouped_counts['time'] >= current_time)].copy()
filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days # Calculate days until the large value
filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()
scatter = go.Scatter(
x=filtered_data_df['days_until_large_value'],
y=filtered_data_df['count'],
mode='markers',
marker=dict(
size= 6,
color=filtered_data_df['count'],
colorscale='Viridis',
colorbar=dict(title='Number of Earthquakes'),
),
text=filtered_data_df['time'],
)
fig.add_trace(scatter)
fig.update_layout(
title='Top 10 Highest Earthquake Counts, 1 Week Before/After',
xaxis=dict(title='Days Until Large Value'),
yaxis=dict(title='Number of Earthquakes'),
width=900,
height=600,
)
fig.update_xaxes(title_text='Days Before/After Spike')
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')
fig.show()
Time before/after a large earthquake
Show code cell source
filtered_data_df = pd.DataFrame()
# Plotting 1 week before/after a large earthquake
for index, row in usgs_grouped_max_top10.iterrows():
current_time = row['time']
previous_time = current_time - one_week
next_time = current_time + one_week
filtered_data_before = usgs_grouped_max[(usgs_grouped_max['time'] >= previous_time) & (usgs_grouped_max['time'] < current_time)].copy()
filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days # Calculate days until the large value
filtered_data_after = usgs_grouped_max[(usgs_grouped_max['time'] <= next_time) & (usgs_grouped_max['time'] >= current_time)].copy()
filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days # Calculate days until the large value
filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()
scatter = go.Scatter(
x=filtered_data_df['days_until_large_value'],
y=filtered_data_df['mag'],
mode='markers',
marker=dict(
size=6,
color=filtered_data_df['mag'],
colorscale='Viridis',
colorbar=dict(title='Magnitude'),
),
text=filtered_data_df['time'],
)
fig.add_trace(scatter)
fig.update_layout(
title='Top 10 Largest Earthquakes, 1 Week Before/After',
xaxis=dict(title='Days Before/After Spike'),
yaxis=dict(title='Magnitude'),
width=900,
height=600,
)
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')
fig.show()
Earthquake Locations During Spikes#
Earthquake Locations Before/After a day of large amount of earthquakes
Show code cell source
filtered_data_df = pd.DataFrame()
# Plotting locations of earthquakes 1 week before/after a day of high count of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
current_time = row['time']
previous_time = current_time - one_week
next_time = current_time + one_week
filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
# Add a new column 'time_group' to identify different times for symbols
filtered_data_before['time_group'] = str(current_time)
filtered_data_after['time_group'] = str(current_time)
filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()
color_scale_min = -6
color_scale_max = 6
for time_group in filtered_data_df['time_group'].unique():
subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
fig.add_trace(go.Scatter(
x=subset_df['longitude'],
y=subset_df['latitude'],
mode='markers',
marker=dict(
size=subset_df['mag'],
sizemode='diameter',
sizeref=0.4,
color=subset_df['days_until_large_value'],
symbol='circle',
colorbar=dict(
tickfont=dict(
size=12
),
x=0.5,
y=-0.2,
orientation='h',
len=1.0,
title='Days +/- Large Event',
),
showscale=True,
colorscale='Viridis',
cmin = color_scale_min,
cmax = color_scale_max
),
text=subset_df['time'],
name=str(time_group)
))
# Update layout
fig.update_layout(
width=800,
height=800,
title='Earthquake Locations Before/After Large Count Of Earthquakes',
xaxis=dict(title='Longitude'),
yaxis=dict(title='Latitude'),
dragmode='pan',
legend=dict(
title='Time Groups',
font=dict(
size=12
),
),
)
fig.show()
Earthquake Locations Before/After a large earthquake
Show code cell source
filtered_data_df = pd.DataFrame()
# Plotting locations of earthquakes 1 week before/after a large earthquake
for index, row in usgs_grouped_max_top10.iterrows():
current_time = row['time']
previous_time = current_time - one_week
next_time = current_time + one_week
filtered_data_before = usgs[(usgs['time'] >= previous_time) & (usgs['time'] < current_time)].copy()
filtered_data_before['days_until_large_value'] = (current_time - filtered_data_before['time']).dt.days
filtered_data_after = usgs[(usgs['time'] <= next_time) & (usgs['time'] >= current_time)].copy()
filtered_data_after['days_until_large_value'] = (current_time - filtered_data_after['time']).dt.days
# Add a new column 'time_group' to identify different times for symbols
filtered_data_before['time_group'] = str(current_time)
filtered_data_after['time_group'] = str(current_time)
filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()
color_scale_min = -6
color_scale_max = 6
for time_group in filtered_data_df['time_group'].unique():
subset_df = filtered_data_df[filtered_data_df['time_group'] == time_group]
fig.add_trace(go.Scatter(
x=subset_df['longitude'],
y=subset_df['latitude'],
mode='markers',
marker=dict(
size=subset_df['mag'],
sizemode='diameter',
sizeref=0.4,
color=subset_df['days_until_large_value'],
symbol='circle',
colorbar=dict(
tickfont=dict(
size=12
),
x=0.5,
y=-0.2,
orientation='h',
len=1.0,
title='Days +/- Large Event',
),
showscale=True,
colorscale='Viridis',
cmin = color_scale_min,
cmax = color_scale_max
),
text=subset_df['time'],
name=str(time_group)
))
# Update layout
fig.update_layout(
width=800,
height=800,
title='Earthquake Locations Before/After Large Earthquake',
xaxis=dict(title='Longitude'),
yaxis=dict(title='Latitude'),
dragmode='pan',
legend=dict(
title='Time Groups',
font=dict(
size=12
),
),
)
# Show the interactive plot
fig.show()
Energy Calculation And Filtering#
Converting the date columns to datetime
Date > 1960-01-01 and < 2023-01-01
Longitude > -123 and < -113
Latitude > 29 and < 39
Converting the magnitudes to energy through the formula: 1/15 * log(10^(1.5*mag))
csv_file = "../datasets/All (1960-2023).csv"
usgs_energy = pd.read_csv(csv_file, sep = ',', lineterminator='\n', dtype={'time':str})
usgs_energy["time"] = pd.to_datetime(usgs_energy["time"], errors="coerce")
usgs_energy['mag'] = pd.to_numeric(usgs_energy['mag'], errors='coerce')
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_17248\317267901.py:2: DtypeWarning:
Columns (1,2,3,4,6,7,8,9,15,16,17,18) have mixed types. Specify dtype option on import or set low_memory=False.
Show code cell source
usgs_energy = usgs_energy[(usgs_energy['time'] > '1960-01-01') & (usgs_energy['time'] < '2023-01-01')]
usgs_energy['longitude'] = pd.to_numeric(usgs_energy['longitude'], errors='coerce')
usgs_energy['latitude'] = pd.to_numeric(usgs_energy['latitude'], errors='coerce')
#filter the dataset by X > -123 and X < -113 and Y > 29 and Y < 39
usgs_energy = usgs_energy[usgs_energy['longitude'] > -123]
usgs_energy = usgs_energy[usgs_energy['longitude'] < -113]
usgs_energy = usgs_energy[usgs_energy['latitude'] < 39]
usgs_energy = usgs_energy[usgs_energy['latitude'] > 29]
formula_constant = (1/1.5)
usgs_energy['energy'] = 10**(1.5*usgs_energy['mag'])
usgs_energy['energy'] = np.log(usgs_energy['energy']) * formula_constant
from datetime import datetime
usgs_energy['time'] = pd.to_datetime(usgs_energy['time']).dt.strftime("%Y-%m-%d %H:%M:%S.%f%z")
usgs_energy['time'] = usgs_energy['time'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f%z"))
Show code cell source
usgs_grouped_energy = pd.DataFrame(usgs_energy.groupby(usgs_energy['time'].dt.to_period('S')).energy.sum())
usgs_grouped_energy.reset_index(inplace=True)
usgs_grouped_energy['time'] = usgs_grouped_energy['time'].dt.to_timestamp()
Show code cell output
C:\Users\Vishal\AppData\Local\Temp\ipykernel_17248\3270407175.py:1: UserWarning:
Converting to PeriodArray/Index representation will drop timezone information.
usgs_grouped_energy.head()
| time | energy | |
|---|---|---|
| 0 | 1960-01-02 22:51:45 | 9.302444 |
| 1 | 1960-01-05 18:01:47 | 6.976833 |
| 2 | 1960-01-07 17:51:32 | 8.381410 |
| 3 | 1960-01-08 06:51:21 | 7.138014 |
| 4 | 1960-01-11 19:08:39 | 8.726798 |
Energy Before/After Large Events#
Show code cell source
filtered_data_df = pd.DataFrame()
# Plotting 1 week before/after a day with a large number of earthquakes
for index, row in usgs_grouped_counts_top10.iterrows():
current_time = row['time']
previous_time = current_time - one_week
next_time = current_time + one_week
filtered_data_before = usgs_grouped_energy[(usgs_grouped_energy['time'] >= previous_time) & (usgs_grouped_energy['time'] < current_time)].copy()
filtered_data_before['time_until_large_value'] = (current_time - filtered_data_before['time'])
filtered_data_after = usgs_grouped_energy[(usgs_grouped_energy['time'] <= next_time) & (usgs_grouped_energy['time'] >= current_time)].copy()
filtered_data_after['time_until_large_value'] = (current_time - filtered_data_after['time'])
filtered_data_df = pd.concat([filtered_data_df, filtered_data_before, filtered_data_after])
fig = go.Figure()
scatter = go.Scatter(
x=filtered_data_df['time_until_large_value'],
y=filtered_data_df['energy'],
mode='markers',
marker=dict(
size=6,
color=filtered_data_df['energy'],
colorscale='Viridis',
colorbar=dict(title='Energy'),
),
text=filtered_data_df['time'],
)
fig.add_trace(scatter)
fig.update_layout(
title='Top 10 Largest Earthquake Count Days, 1 Week Before/After (Energy)',
xaxis=dict(title='Days Before/After Spike'),
yaxis=dict(title='Energy'),
width=900,
height=600,
)
# Add interactivity
fig.update_xaxes(rangeslider_visible=True)
fig.update_layout(dragmode='pan')
fig.show()